浏览量 5206
2018/09/21 10:52
#!/usr/bin/env python
# -*- coding: utf-8 -*-
///
./flowdata.log
2017-02-02 15:29:19,390 [views:111:ebitpost] [INFO]- ebitapi: http://218.85.118.8:8000/api/user/query, ebit response: src_ip: 110.86.101.119:63688, content: {"data":{"basic_rate_down":20480,"basic_rate_up":2048,"dial_acct":"fj::059391534153","max_linerate_down":102400,"max_linerate_up":102400},"message":"提速判断成功","result":0}
///
///
./ipdb_cn.txt
1.1.1.0 中国 广东 深圳
1.1.2.0 中国 广东 深圳
...
233.233.2.0 中国 新疆 乌鲁木齐
///
import re,heapq,threading
from collections import Counter
from multiprocessing import Pool
dic={}
def readconfig():
with open('./ipdb_cn.txt',mode='r') as f:
for i in f:
nn=i.split()
tn= nn[2].decode('utf-8')
if dic.has_key(tn):
dic[tn].add('.'.join(nn[0].split('.')[:-1]))
else:
dic[tn]=set()
dic[tn].add('.'.join(nn[0].split('.')[:-1]))
t=threading.Thread(target=readconfig)
t.start()
tf=open('./flowdata.log','r')
tf.seek(0,2)
total=tf.tell()
def run(start,end):
with open('./flowdata.log','r') as f:
s=set()
regex=re.compile(r'_ip:\s?([0-9]+(?:\.[0-9]+){3}')
ad=s.add
tel=f.tell
fd=re.findall
f.seek(start,0)
for i in f:
l=fd(regex,i)
if len(l):
ad(l[0])
if tel()>end:
return s
return s
p=Pool(4)
results=[]
for i in range(12):
result=p.apply_async(run,args=(i*total/12,(i+1)*total/12))
results.append(result)
p.close()
p.join()
t.join()
filset=set()
for result in results:
filset|=result.get()
sumfil=len(filset)
filist=list(filset)
def refn(start,end):
return [k for i in filist[start:end] for k in dic if i[:i.rindex('.')] in dic[k]]
p=Pool(4)
results=[]
for i in range(8):
result=p.apply_async(refn,args=(i*sumfil/8,(i+1)*sumfil/8))
results.append(result)
p.close()
p.join()
fn=[]
for result in results:
fn+=result.get()
fdic=Counter(fn)
ret=[{'n':k,'v':fdic[k]/float(sumfil)*100} for k in fdic]
sortl=heapq.nlargest(len(ret),ret,key=lambda s:s['v'])
for i in sortl:
print i['n'] + ' ' + '%.2f' % round(i['v'],2)+'%'
上一篇 搜索 下一篇